🆕 程式碼
/**
 * 將 float32 向量 [-1,1] 量化成 int8
 */
export function quantizeVector(vec = []) {
  return Int8Array.from(vec.map(v => Math.max(-1, Math.min(1, v)) * 127));
}
/**
 * 將 int8 還原成近似 float32
 */
export function dequantizeVector(qvec) {
  return Array.from(qvec).map(v => v / 127);
}
/**
 * 近似 cosine,相對值會接近原始
 */
export function cosineQ(qv, fv) {
  const dv = dequantizeVector(qv);
  let dot = 0, na = 0, nb = 0;
  for (let i=0;i<dv.length;i++) {
    dot += dv[i]*fv[i];
    na += dv[i]*dv[i];
    nb += fv[i]*fv[i];
  }
  return dot/(Math.sqrt(na)*Math.sqrt(nb)+1e-9);
}
假設你原本有 buildIndex 和 answerWithRAG,現在加一個 buildIndexQuantized。
import { quantizeVector } from "./day26_quantize.js";
...
export async function buildIndexQuantized({ tenant, ns }) {
  const { kbDir, idxFile } = ensureTenantNS(tenant, ns);
  const outFile = idxFile.replace(".json", ".qindex.json");
  const files = fs.readdirSync(kbDir).filter(f => /\.md$|\.txt$/i.test(f));
  const index = [];
  for (const f of files) {
    const docId = path.basename(f);
    const text = fs.readFileSync(path.join(kbDir,f),"utf-8");
    const chunks = chunkTextSmart(text); // 你 Day18 已有
    const vecs = await embedMany(chunks.map(c=>c.text));
    chunks.forEach((c,i)=>{
      index.push({
        id: `${docId}#${i}`,
        docId,
        text: c.text,
        vectorQ: Array.from(quantizeVector(vecs[i])) // 存 int8 array
      });
    });
  }
  fs.writeFileSync(outFile, JSON.stringify({ builtAt:Date.now(), quantized:true, index }, null, 2));
  return { outFile, chunks:index.length };
}
支援壓縮索引建置:
import { buildIndex, buildIndexQuantized } from "../../../../../src/day16_rag_store.js";
...
export const POST = withAuth(async (req, ctx) => {
  const { tenant, ns } = ctx.params;
- const out = await buildIndex({ tenant, ns });
- return NextResponse.json({ ok:true, ...out });
+ const url = new URL(req.url);
+ const quantize = url.searchParams.get("quantize")==="1";
+ const out = quantize ? await buildIndexQuantized({ tenant, ns }) : await buildIndex({ tenant, ns });
+ return NextResponse.json({ ok:true, quantize, ...out });
}, ["editor","admin"]);
const [useQuant, setUseQuant] = useState(false);
...
<div className="form-control">
  <label className="label cursor-pointer">
    <span className="label-text">使用壓縮索引</span>
    <input type="checkbox" className="toggle toggle-sm" checked={useQuant} onChange={e=>setUseQuant(e.target.checked)} />
  </label>
</div>
發問時,傳 strategy: "default" 但在後端先嘗試載入 .qindex.json(若 useQuant=true),否則 fallback 原始索引。
▶️ 驗收流程
重建壓縮索引:
curl -X POST http://localhost:3000/api/kb/acme/faq/reindex?quantize=1 -H "Authorization: Bearer <TOKEN>"
→ 產生 faq.qindex.json。
Studio 打勾「使用壓縮索引」,再發問。
記憶體使用量明顯下降(4 倍縮小)。
答案跟原始索引差不多(可能有細微差異)。